# -*- coding: utf-8 -*-

# Scrapy settings for crawler project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     http://doc.scrapy.org/en/latest/topics/settings.html
#     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
#     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html

BOT_NAME = 'PySearch'

SPIDER_MODULES = ['crawler.spiders']
NEWSPIDER_MODULE = 'crawler.spiders'


# Retry many times since proxies often fail
RETRY_TIMES = 3
# Retry on most error codes since proxies fail for different reasons
RETRY_HTTP_CODES = [500, 503, 504, 400, 403, 404, 408]

DOWNLOADER_MIDDLEWARES = {
    # 'scrapy.contrib.downloadermiddleware.retry.RetryMiddleware': 3,
    # 'crawler.middleware.randomProxy.RandomProxyMiddleware': 100,
    # 'scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware': 110,
}

PROXY_LIST = 'conf/proxyList.txt'


DEFAULT_ITEM_CLASS = 'crawler.items.ArticleItem'

ITEM_PIPELINES = {
    'crawler.pipelines.Validator': 1,
    'crawler.pipelines.FilePersistant': 2,
                  }

LOG_ENABLED = True
LOG_FILE = 'log/PySearch.log'

LOG_LEVEL = 'INFO'


